EDA_Vanessa

Author

Vanessa

pacman::p_load(tidyverse, ggstatsplot, plotly, ggplot2, ggdist, dplyr)
weather_list <- list.files(path = "WeatherStation_Cleaned",
                               recursive = TRUE,
                               pattern = "\\.csv$",
                               full.names = TRUE)

weather <- read_csv(weather_list)
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)
Rows: 163904 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Station, Daily Rainfall Total, Highest 30 Min Rainfall, Highest 120...
dbl (9): Year, Month, Day, Daily Rainfall Total (mm), Mean Temperature, Maxi...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
weather <- weather %>% 
          select(1:13, "DailyRainfall" = 5, "HighestRainfall30" = 6,
                 "HighestRainfall60" = 7, "HighestRainfall120" = 8,
                 "MeanTemp" = 9, "MaxTemp" = 10, "MinTemp" = 11,
                 "MeanWindSpeed" = 12, "MaxWindSpeed" = 13)
weather$Year <- as.integer(weather$Year)
weather$Month <- month.abb[weather$Month]
weather$Day <- as.integer(weather$Day)
weather$DailyRainfall <- as.numeric(weather$DailyRainfall)
weather$HighestRainfall30 <- as.numeric(weather$HighestRainfall30)
Warning: NAs introduced by coercion
weather$HighestRainfall60 <- as.numeric(weather$HighestRainfall60)
Warning: NAs introduced by coercion
weather$HighestRainfall120 <- as.numeric(weather$HighestRainfall120)
Warning: NAs introduced by coercion
weather$MeanTemp <- as.numeric(weather$MeanTemp)
weather$MaxTemp <- as.numeric(weather$MaxTemp)
weather$MinTemp <- as.numeric(weather$MinTemp)
weather$MeanWindSpeed <- as.numeric(weather$MeanWindSpeed)
weather$MaxWindSpeed <- as.numeric(weather$MaxWindSpeed)

glimpse(weather)
Rows: 163,904
Columns: 13
$ Station            <chr> "Admiralty", "Admiralty", "Admiralty", "Admiralty",…
$ Year               <int> 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 200…
$ Month              <chr> "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "J…
$ Day                <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
$ DailyRainfall      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall30  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall60  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall120 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MeanTemp           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MaxTemp            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MinTemp            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MeanWindSpeed      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MaxWindSpeed       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
weather$DDate <- as.Date(paste(weather$Year, 
                                weather$Month, 
                                weather$Day, sep = "-"), 
                              format = "%Y-%b-%d")

Temp_month <- weather %>% 
                group_by(Station, Year, Month) %>% 
                summarise(AveMeanTemp = mean(MeanTemp, na.rm = TRUE),
                          MaxMaxTemp = max(MaxTemp),
                          MinMinTemp = min(MinTemp))
`summarise()` has grouped output by 'Station', 'Year'. You can override using
the `.groups` argument.
Temp_monthYr <- Temp_month %>% 
  mutate(MonthOfYear = match(Month, month.abb) + (Year - 1981) * 12 ) 

Temp_yr_error <- weather %>%
  group_by(Year) %>%
  summarise(n = n(), Temp = mean(MeanTemp, na.rm = TRUE), 
            sd = sd(MeanTemp, na.rm = TRUE)) %>%
  mutate(se = sd/sqrt(n-1))
Temp <- Temp_monthYr %>% 
  filter(Station == 'Changi')
gg <- ggplot(Temp, aes(x = MonthOfYear, y = AveMeanTemp, 
                         color = factor(Year))) +
    geom_line(linewidth = 0.1) +
    geom_point(aes(text = paste0("Month:", Month, 
                                "<br>MeanTemp:", AveMeanTemp, "ºC"))) +
    scale_x_continuous(breaks = seq(from = 1, by = 120, length.out = 42),
                labels = seq(from = 1981, by = 10, length.out = 42)) +
    labs(x = "Year", y = "Monthly mean temperature (ºC)", color = "Year",
         title = "Trend of Monthly Mean Temperature at Changi Station from 1981 to 2023", 
         subtitle = "Gentle trend line sloping upwards from 1981",
         caption = "Data from Meteorological Service Singapore website") +
    geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3), 
                se = FALSE, color = "black") +
    theme_minimal() 
Warning in geom_point(aes(text = paste0("Month:", Month, "<br>MeanTemp:", :
Ignoring unknown aesthetics: text
ggplotly(gg, tooltip = "text") %>%
    layout(title = list(text = 
                        paste0(gg$labels$title, "<br>", "<sup>", 
                               gg$labels$subtitle, "</sup>"),
                        font = list(weight = "bold")),
           showlegend = FALSE,
    annotations = list(text = gg$labels$caption,
                      xref = "paper", yref = "paper",
                      x = 1000, y = 24,
                      xanchor = "right", yanchor = "top",
                      showarrow = FALSE)) 
Warning: Removed 24 rows containing non-finite values (`stat_smooth()`).
model <- lm(Temp ~ Year, Temp_yr_error)
y_intercept = coef(model)[1] 
slope_coeff = coef(model)[2]
adjust_yintercept = slope_coeff * 1982 + y_intercept

gg <- ggplot(Temp_yr_error) +
       geom_errorbar(aes(x = factor(Year), ymin = Temp - 2.58 * se, 
                      ymax = Temp+2.58*se), 
                      width=0.2, colour="black", 
                      alpha=0.9, size=0.5) +
       geom_point(aes(x = factor(Year), y = Temp, 
             text = paste0("Year:", `Year`, 
                          "<br>Avg. Temp:", round(Temp, digits = 2),
                          "<br>95% CI:[", 
                          round((Temp - 2.58 * se), digits = 2), ",",
                          round((Temp + 2.58 * se), digits = 2),"]")),
             stat="identity", color="darkred", 
             size = 1.5, alpha = 1) +
       geom_abline(slope = round(slope_coeff, 4), 
                   intercept = adjust_yintercept,
                   untf = TRUE,
                   color = "blue",
                   linetype = "dashed")+
       geom_text(aes(x = 11, y = 27.8, colour = "blue",
                     label = paste0("Temp=", 
                                    round(slope_coeff, 4), "* Year ",
                                    round(y_intercept, 4)))) +
       labs (x = "Year", y = "Annual mean temperatures (°C)",
             title = "99% Confidence interval of annual mean temperatures by year",
             subtitle = "From 1982 to 2023",
             caption = "Data from Meteorological Service Singapore website") +
       theme_minimal() + 
       theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
             plot.title = element_text(face = "bold", size = 12))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning in geom_point(aes(x = factor(Year), y = Temp, text = paste0("Year:", :
Ignoring unknown aesthetics: text
Warning in geom_abline(slope = round(slope_coeff, 4), intercept =
adjust_yintercept, : Ignoring unknown parameters: `untf`
ggplotly(gg, tooltip = "text") %>%
    layout(title = list(text = 
                        paste0(gg$labels$title, "<br>", "<sup>", 
                               gg$labels$subtitle, "</sup>"),
                        font = list(weight = "bold")),
           showlegend = FALSE)
Temp <- Temp_month %>% 
        group_by(Year, Month) %>% 
        summarise(MTemp = mean(AveMeanTemp, na.rm = TRUE))
`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
gg <- ggplot(Temp, aes(factor(Month, levels = month.abb), factor(Year), 
                          fill = MTemp)) + 
    geom_tile(color = "white",
              aes(text = paste0(Year, "-", Month,
                                "<br>Temp:", round(MTemp, 2), "°C"))) + 
    theme_minimal() + 
    scale_fill_gradient(name = "Temperature",
                        low = "sky blue", 
                        high = "dark blue") +
    labs(x = NULL, y = NULL, 
         title = "Mean temperatures by year and month",
         subtitle = "Hotter in more months of 2023 as compared to the other years")
Warning in geom_tile(color = "white", aes(text = paste0(Year, "-", Month, :
Ignoring unknown aesthetics: text
ggplotly(gg, tooltip = "text")
Show the code
gg <- ggplot(weather, 
       aes(x = factor(Month, levels = month.abb), y = MeanTemp)) +
  geom_violin(color = "navy", fill = "lightblue") +
  geom_hline(data = weather, 
             aes(yintercept = mean(MeanTemp, na.rm = TRUE)),
             linetype = "dashed", size = 1, colour = "brown") +
  geom_text(aes(x = 4.5, y = 27.3, 
                 label = paste0("Mean : ", 
                                round(mean(MeanTemp,na.rm = TRUE),2), "°C")), 
            colour = "brown") +
  stat_summary(fun = mean, geom = "point", 
               shape = 20, size = 3, color = "orange",
               aes(text = paste0("Mean : ", round(after_stat(y), 2), "°C"))) +
  theme_minimal() +
  labs(title = "Daily mean temperature across each month from 1981 to 2023",
       subtitle = "November to February are cooler as compared to the rest of the year",
        y = "Daily mean Temperatures (°C)",
        x = "Month",
        caption = "Data from Meteorological Service Singapore website")
Warning in stat_summary(fun = mean, geom = "point", shape = 20, size = 3, :
Ignoring unknown aesthetics: text
Show the code
ggplotly(gg, tooltip = "text") %>%
    layout(title = list(text =
                        paste0(gg$labels$title, "<br>", "<sup>",
                               gg$labels$subtitle, "</sup>"),
                        font = list(weight = "bold")))
Warning: Removed 58654 rows containing non-finite values (`stat_ydensity()`).
Warning: Removed 58654 rows containing non-finite values (`stat_summary()`).